# Description -------------------------------------------------------------


# To replicate the survey analysis (Figure 4 and Table 5) follow those steps. 
# 
# 1. Open the script "indocrination_survey_analysis.R"
# 2. Set the working directory, so that the file "indoctrination_survey_data.rda" can be found and adjust the "setwd()" command accordingly. 
# 4. Create a "/tables/" and "/figures/" folder in the working directory.
# 3. Run the "indocrination_survey_analysis.R" file, which will produce the figures and tables in the respective subdirectories. 


# Packages ----------------------------------------------------------------



library(tidyverse)
library(lfe)
library(stargazer)
library(modelsummary)
library(ggsci)
library(patchwork)
library(stargazer)
library(gt)


# loads "dd_sample"
load("./data/dd_sample_surveys.rda")


# Survey Summary Statistics -----------------------------------------------

dd_sumstats <- dd_sample %>% 
  select(`Socialist Preferences Index (standardized)` = soc_index_count, 
         `Socialist Preferences Index (raw scores)` = soc_index_count_raw,
         `Age` = age)


stargazer(dd_sumstats %>% as.data.frame(), digits = 2, type = "latex", 
          summary = T,
          summary.stat = c("n", "mean", "median", "sd", "min", "max"),
          title = c("Obs", "Mean", "Median", "SD", "Min", "Max"),
          out = "./Tables/NVA_Tables/survey_summary_statistics.tex", 
          float = F)



# Survey DD plots -------------------------------------------------------------



# define jitter parameters
jitter.width = 0.25
jitter.height = 0.05
dodge.width = 0.5

# define global jitter position

jd_position <- position_jitterdodge(jitter.width = jitter.width, 
                                    jitter.height = jitter.height,
                                    dodge.width = dodge.width)

d_position <- position_dodge(width = 0.5)


# data preparation for standardized sample
dd_sample_stand <- dd_sample %>% 
  group_by(survey_period, wehrdienst_nva) %>% 
  mutate(mean_soc = mean(soc_index_count, na.rm  =T)) %>% 
  ungroup() %>% 
  mutate(survey_period = forcats::fct_relevel(survey_period, c("1983", "1985",  "1989", "1991"))) %>% 
  arrange(survey_period, wehrdienst_nva) %>% 
  filter(!is.na(wehrdienst_nva))


# build standardized plot
dd_plot_standardized <- ggplot(dd_sample_stand , 
                               aes(x = survey_period, 
                                   y = soc_index_count, 
                                   fill = factor(wehrdienst_nva), 
                                   color = factor(wehrdienst_nva))) + 
  geom_point(position = jd_position, 
             size = 1.5, 
             shape = 21, color = "grey",
             alpha = 0.1) + 
  geom_line(data = dd_sample_stand %>% 
              group_by(survey_period, wehrdienst_nva) %>% 
              filter(row_number() == min(row_number())) %>% 
              ungroup() %>% 
              arrange(wehrdienst_nva, survey_period), 
            aes(group = factor(wehrdienst_nva), 
                y = mean_soc), 
            position = d_position) +
  geom_point(data = dd_sample_stand %>% 
               group_by(survey_period, wehrdienst_nva) %>% 
               filter(row_number() == min(row_number())) %>% 
               ungroup() %>% 
               arrange(wehrdienst_nva, survey_period), 
             aes(x = survey_period, 
                 fill = factor(wehrdienst_nva),
                 group = factor(wehrdienst_nva), 
                 y = mean_soc), 
             color= "black",
             position = d_position,
             size = 6, pch = 21) +
  
  theme_bw() +
  labs(y = "Socialist Preferences Index\n(Standardized)", x = "Survey Period", 
       caption = "\nLarge points represent group means; smaller transparent points display jittered individual respondent values")



# plot raw scores

dd_sample_raw <- dd_sample %>% 
  group_by(survey_period, wehrdienst_nva) %>% 
  mutate(mean_soc = mean(soc_index_count_raw, na.rm  =T)) %>% 
  ungroup() %>% 
  mutate(survey_period = forcats::fct_relevel(survey_period, 
                                              c("1983", "1985",  "1989", "1991"))) %>% 
  arrange(survey_period, wehrdienst_nva) %>%  
  filter(!is.na(wehrdienst_nva))


dd_plot_raw <- ggplot(dd_sample_raw %>% 
                        ungroup() , 
                      aes(x = survey_period, 
                          y = soc_index_count_raw, 
                          fill = factor(wehrdienst_nva), 
                          color = factor(wehrdienst_nva))) + 
  geom_point(position = jd_position, 
             size = 1.5, 
             shape = 21, color = "grey",
             alpha = 0.1) + 
  geom_line(data = dd_sample_raw %>% 
              group_by(survey_period, wehrdienst_nva) %>% 
              filter(row_number() == min(row_number())) %>% 
              ungroup() %>% 
              arrange(wehrdienst_nva, survey_period), 
            aes(group = factor(wehrdienst_nva), 
                y = mean_soc), 
            position = d_position) +
  geom_point(data = dd_sample_raw %>% 
               group_by(survey_period, wehrdienst_nva) %>% 
               filter(row_number() == min(row_number())) %>% 
               ungroup() %>% 
               arrange(wehrdienst_nva, survey_period), 
             aes(x = survey_period, 
                 fill = factor(wehrdienst_nva),
                 group = factor(wehrdienst_nva), 
                 y = mean_soc), 
             color= "black",
             position = d_position,
             size = 6, pch = 21) +
  
  theme_bw() +
  labs(y = "Socialist Preferences Index\n(Raw Scores)", x = "Survey Period")


dd_plot_combined <- (dd_plot_raw + dd_plot_standardized)  + plot_layout(guides = "collect")

dd_plot_combined <- dd_plot_combined & theme(legend.position = "bottom") & 
  ggsci::scale_fill_startrek(name = "NVA Military Service") & 
  ggsci::scale_color_startrek(name = "NVA Military Service")  

print(dd_plot_combined)

ggsave(plot = dd_plot_combined, filename = "./figures/dd_plot.pdf", 
       width = 12, height = 6, scale = 0.8)



# Survey DD Models ---------------------------------------------------------------


# models
dd_model_index_base <-felm(soc_index_count_raw ~ 
                             I(survey_period == "1991") * wehrdienst_nva 
                           
                           | survey_period | 0 | id, data = dd_sample) 

dd_model_index_control <- felm(soc_index_count_raw ~ 
                                 I(survey_period == "1991") * wehrdienst_nva + 
                                 age +
                                 I(age^2)
                               
                               | survey_period + interview_month | 0 | id, data = dd_sample) 

dd_model_index_base_stand <-felm(soc_index_count ~ 
                                   I(survey_period == "1991") * wehrdienst_nva 
                                 
                                 | survey_period | 0 | id, data = dd_sample) 

dd_model_index_control_stand <- felm(soc_index_count ~ 
                                       I(survey_period == "1991") * wehrdienst_nva + 
                                       age +
                                       I(age^2)
                                     
                                     | survey_period  + interview_month | 0 | id, data = dd_sample) 


# model output
coef_map <- c("I(survey_period == \"1991\")TRUE:wehrdienst_nva" = "Post 1990 x NVA Military Service", 
              "wehrdienst_nva" = "Wehrdienst NVA", 
              "sed_dummy" = "SED Member", 
              "age" = "Age", 
              "I(age^2)" = "Age^2")


rows <- tibble::tribble(~term, ~`Model 1`, ~`Model 2`, ~`Model 3`, ~`Model 4`,
                          "Survey Period FE", "Yes", "Yes", "Yes", "Yes",

             "Interview Month FE", "No", "Yes", "No", "Yes")

modelsummary(list(dd_model_index_base,
                  dd_model_index_control, 
                  dd_model_index_base_stand, 
                  dd_model_index_control_stand), 
             output = "gt",
             coef_map = coef_map,
             gof_omit = 'IC|Log|Adj',
              add_rows = rows,
             # add_rows_location = 0,
             stars = T) %>% 
  tab_spanner(label = "Socialist Preferences Index\n(raw scores)", columns = 2:3) %>%
  tab_spanner(label = 'Socialist Preferences Index\n(standardized)', columns = 4:5) %>% 
  
  # replicate booktabs: remove horizontal rules
  tab_style(style = cell_borders(
    sides = "all", color = "#000000", style = "solid", weight = px(0)
  ), 
  locations = cells_body(
    columns = everything(),
    rows = everything()
  )) %>% 
  
  # tabs before GoF stats
  tab_style(style = cell_borders(
    sides = "bottom", color = "#D3D3D3", style = "solid", weight = px(2)
  ), 
  locations = cells_body(
    
    rows = 10
  )) %>% 
  gt::gtsave(., filename = "survey_dd.tex", path = "./tables/")

